package com.four.reptile.controller;

import com.four.reptile.config.GithubRepoPageProcessor;
import com.four.reptile.config.JsonFilePipeline;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import us.codecraft.webmagic.Spider;

@RestController
public class ReptileController {

    @GetMapping({"/",""})
    public String msg() {
        Spider.create(new GithubRepoPageProcessor())
                //从"https://github.com/code4craft"开始抓 爬取网站
                .addUrl("https://search.jd.com/Search?keyword=%E5%8D%8E%E4%B8%BA&enc=utf-8&wq=%E5%8D%8E%E4%B8%BA&pvid=e8a41b67774d4aebb6b4e2d65190e8c8")
                .addPipeline(new JsonFilePipeline())
                //开启5个线程抓取
                .thread(5)
                //启动爬虫
                .run();
        return "0";
    }
}
